Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.
Running Code
When you click the Render button a document will be generated that includes both content and the output of embedded code. You can embed code like this:
library("dplyr")
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library("ggplot2")library("plotly")
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ plotly::filter() masks dplyr::filter(), stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
hospital_data <-read.csv('../../../../data/hospital_data.csv')# Aggregate data per hospital (mean if multiple entries per hospital)plot_data <- hospital_data %>%group_by(Facility.HHS.Desc) %>%summarise(avg_no_wait =mean(Patients.who.did.not.wait.for.treatment...., na.rm =TRUE)) %>%arrange(desc(avg_no_wait)) %>%slice_max(avg_no_wait, n =20) # Top 20# Create clearer horizontal barplotggplot(plot_data, aes(x =reorder(Facility.HHS.Desc, avg_no_wait),y = avg_no_wait,fill = avg_no_wait)) +geom_col(show.legend =FALSE) +coord_flip() +labs(title ="Top 20 Hospitals by % of Patients Who Left Without Treatment",x ="Hospital",y ="% of Patients Who Did Not Wait" ) +scale_fill_gradient(low ="skyblue", high ="red") +theme_minimal(base_size =14) +theme(plot.title =element_text(face ="bold", hjust =0.5),axis.text.y =element_text(size =11) )
# Calculate the average % of patients who did not wait per hospitalbottom_20_plot_data <- hospital_data %>%group_by(Facility.HHS.Desc) %>%summarise(avg_no_wait =mean(Patients.who.did.not.wait.for.treatment...., na.rm =TRUE)) %>%arrange(avg_no_wait) %>%slice_min(avg_no_wait, n =20) # Bottom 20 hospitals# Plot: bottom 20 hospitals with the lowest no-wait percentageggplot(bottom_20_plot_data, aes(x =reorder(Facility.HHS.Desc, avg_no_wait), y = avg_no_wait, fill = avg_no_wait)) +geom_col(show.legend =FALSE) +coord_flip() +labs(title ="Bottom 20 Hospitals by % of Patients Who Left Without Treatment",x ="Hospital",y ="% of Patients Who Did Not Wait" ) +scale_fill_gradient(low ="forestgreen", high ="yellow") +theme_minimal(base_size =14) +theme(plot.title =element_text(face ="bold", hjust =0.5),axis.text.y =element_text(size =11))
# Summarize total attendances per hospital (in case of duplicates or multiple rows)top_20_attendance <- hospital_data %>%group_by(Facility.HHS.Desc) %>%summarise(total_attendances =sum(Number.of.Attendances, na.rm =TRUE)) %>%arrange(desc(total_attendances)) %>%slice_max(total_attendances, n =20)# Plot the top 20 hospitals by attendanceggplot(top_20_attendance, aes(x =reorder(Facility.HHS.Desc, total_attendances),y = total_attendances,fill = total_attendances)) +geom_col(show.legend =FALSE) +coord_flip() +labs(title ="Top 20 Hospitals by Number of Attendances",x ="Hospital",y ="Number of Attendances" ) +scale_fill_gradient(low ="lightblue", high ="darkblue") +theme_minimal(base_size =14) +theme(plot.title =element_text(face ="bold", hjust =0.5),axis.text.y =element_text(size =11) )
# Filter for Triage 1, calculate average wait time, select top 20hospital_data |>filter(Triage.Category =="1") |>group_by(Facility.HHS.Desc) |>summarise(avg_wait =mean(Median.Waiting.time.to.treatment..minutes., na.rm =TRUE)) |>arrange(desc(avg_wait)) |>slice_max(avg_wait, n =20) |>ggplot(aes(x =reorder(Facility.HHS.Desc, avg_wait), y = avg_wait)) +geom_col(fill ="darkred") +geom_text(aes(label =round(avg_wait, 1)), hjust =-0.1, size =3.5, color ="black") +coord_flip() +scale_y_continuous(limits =c(0, 120)) +labs(title ="Top 20 Hospitals by Average Wait Time (Triage 1)",x ="Hospital",y ="Average Waiting Time (Minutes)" ) +theme_minimal(base_size =14) +theme(plot.title =element_text(face ="bold", hjust =0.5),axis.text.y =element_text(size =11))
# Analyze data by centerTop_20 <- hospital_data%>%filter(Facility.HHS.Desc!="Queensland", Facility.HHS.Desc!="QUEENSLAND")%>%group_by(Facility.HHS.Desc)%>%summarise(Number.of.Attendances =mean(Number.of.Attendances, na.rm=TRUE))%>%arrange(desc(Number.of.Attendances))%>%slice_max(Number.of.Attendances, n=20) Top_20 <-arrange(Top_20, desc(Number.of.Attendances))library(forcats)#Visualize using ggplotggplot(Top_20, aes(x =fct_inorder(Facility.HHS.Desc), y = Number.of.Attendances)) +geom_col(fill ="steelblue") +labs(title ="Hospital Attendances",x ="Facility",y ="Number of Attendances") +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))
# Step 1: Identify top 10 hospitals by total attendancetop_10_hospitals <- hospital_data |>group_by(Facility.HHS.Desc) |>summarise(total_attendance =sum(Number.of.Attendances, na.rm =TRUE), .groups ="drop") |>arrange(desc(total_attendance)) |>slice_max(total_attendance, n =10)# Step 2: Filter main dataset for only these top hospitalstop_data <- hospital_data |>filter(Facility.HHS.Desc %in% top_10_hospitals$Facility.HHS.Desc)# Step 3: Compute average waiting time for heatmapheatmap_data <- top_data |>group_by(Facility.HHS.Desc, Triage.Category) |>summarise(Avg_Wait =mean(Median.Waiting.time.to.treatment..minutes., na.rm =TRUE), .groups ="drop")# Step 4: Create heatmapggplot(heatmap_data, aes(x = Triage.Category, y = Facility.HHS.Desc, fill = Avg_Wait)) +geom_tile(color ="white") +scale_fill_gradient(name ="Avg Wait (min)",low ="lightyellow", # shortest waitshigh ="darkred", # longest waitsna.value ="grey90" ) +labs(title ="Avg Waiting Time (Top 10 Hospitals by Attendance)",x ="Triage Category",y ="Hospital" ) +theme_minimal(base_size =12) +theme(axis.text.x =element_text(angle =45, hjust =1),axis.text.y =element_text(size =9),plot.title =element_text(hjust =0.5, face ="bold") )
# Simple linear modellm_model <-lm(Median.Waiting.time.to.treatment..minutes. ~ Triage.Category + Number.of.Attendances, data = hospital_data)summary(lm_model)
Call:
lm(formula = Median.Waiting.time.to.treatment..minutes. ~ Triage.Category +
Number.of.Attendances, data = hospital_data)
Residuals:
Min 1Q Median 3Q Max
-63.24 -49.29 17.57 38.75 47.62
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.573e+01 1.112e+00 50.115 < 2e-16 ***
Triage.Category2 -3.334e+00 1.532e+00 -2.177 0.02952 *
Triage.Category3 -1.425e+00 1.532e+00 -0.930 0.35249
Triage.Category4 3.045e+00 1.531e+00 1.989 0.04677 *
Triage.Category5 7.514e+00 1.530e+00 4.910 9.24e-07 ***
Triage.CategoryAll -4.580e+01 4.223e+00 -10.844 < 2e-16 ***
Triage.CategoryALL 4.218e+00 1.566e+00 2.694 0.00708 **
Number.of.Attendances -2.778e-05 1.602e-05 -1.734 0.08295 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 41.71 on 9266 degrees of freedom
(168 observations deleted due to missingness)
Multiple R-squared: 0.02211, Adjusted R-squared: 0.02137
F-statistic: 29.92 on 7 and 9266 DF, p-value: < 2.2e-16
ggplot(hospital_data, aes(x = Number.of.Attendances, y = Median.Waiting.time.to.treatment..minutes.)) +geom_point(alpha =0.5) +geom_smooth(method ="lm", se =TRUE, color ="blue") +labs(title ="Effect of Attendance on Waiting Time", x ="Attendances", y ="Waiting Time (min)") +theme_minimal()
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 168 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 168 rows containing missing values or values outside the scale range
(`geom_point()`).
You can add options to executable code like this
[1] 4
The echo: false option disables the printing of code (only output is displayed).